{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "75417ef5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pandas import Series, DataFrame\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "2dbd4a89",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read the full NYC parking-violations CSV file into memory.\n",
    "\n",
    "filename = '../data/nyc-parking-violations-2020.csv'\n",
    "df = pd.read_csv(filename, low_memory=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "76aa5472",
   "metadata": {},
   "outputs": [],
   "source": [
    "root = 'parking-violations'\n",
    "write_methods = {'JSON': df.to_json,\n",
    "           'CSV': df.to_csv,\n",
    "           'feather': df.to_feather\n",
    "          }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "0c0bb56a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saving in JSON\n",
      "\tWriting JSON: total_time=47.94986385299126\n",
      "Saving in CSV\n",
      "\tWriting CSV: total_time=84.28116728103487\n",
      "Saving in feather\n",
      "\tWriting feather: total_time=10.2521946990164\n"
     ]
    }
   ],
   "source": [
    "for one_format, method in write_methods.items():\n",
    "    print(f'Saving in {one_format}')\n",
    "    start_time = time.perf_counter()\n",
    "    write_methods[one_format](f'parking-violations.{one_format.lower()}')\n",
    "    end_time = time.perf_counter()\n",
    "\n",
    "    total_time = end_time - start_time\n",
    "    print(f'\\tWriting {one_format}: {total_time=}')    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e20e3ab7",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "parking-violations.json    : 8,820,247,015\n",
      "parking-violations.csv     : 2,440,860,181\n",
      "parking-violations.feather : 1,466,536,058\n"
     ]
    }
   ],
   "source": [
    "# How big are the files you've created?\n",
    "import glob\n",
    "import os\n",
    "\n",
    "for one_filename in glob.glob(f'{root}*'):\n",
    "    print(f'{one_filename:27}: {os.stat(one_filename).st_size:,}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "258f10b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "read_methods = {'JSON': pd.read_json,\n",
    "           'CSV': pd.read_csv,\n",
    "           'feather': pd.read_feather\n",
    "          }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "05175935",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reading from JSON\n",
      "\tReading JSON: total_time=512.0497572919703\n",
      "Reading from CSV\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/rr/0mnyyv811fs5vyp22gf4fxk00000gn/T/ipykernel_52668/1751173684.py:4: DtypeWarning: Columns (19,30,39,40) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  df = read_methods[one_format](f'parking-violations.{one_format.lower()}')\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\tReading CSV: total_time=44.657161787035875\n",
      "Reading from feather\n",
      "\tReading feather: total_time=13.85696751094656\n"
     ]
    }
   ],
   "source": [
    "for one_format, method in read_methods.items():\n",
    "    print(f'Reading from {one_format}')\n",
    "    start_time = time.perf_counter()\n",
    "    df = read_methods[one_format](f'parking-violations.{one_format.lower()}')\n",
    "    end_time = time.perf_counter()\n",
    "\n",
    "    total_time = end_time - start_time\n",
    "    print(f'\\tReading {one_format}: {total_time=}')    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "404fa135",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
